Overall Objective

Load Libraries

library(tidyverse)
library(cowplot)
library(broom)
library(pwr)
library(plotly)

Import data

#setwd("~/GitHub/time-course/data")
setwd("~/Library/Mobile\ Documents/com~apple~CloudDocs/time-course/data")

rawdata <- "revised_MASTER-ExperimentSummary.csv"
timecourse <- "timecourse2017.csv"


data <- read_csv(rawdata)
tc <- read_csv(timecourse, na = c("","NA"))

Convert data from ‘wide’ to ‘long’ format

data1 <- data %>%
  gather(Sample,Count,2:250)

# Separate samples by identifiers 
data2 <- data1 %>% 
  separate(Sample, into=c("Sample_ID","Dilution_factor","Injection","Tech_rep", sep = "_")) %>% 
  select(-`_`)

Factor the data into categorical variables

# Refactoring Columns for samples
data2$Sample_ID <- as.factor(data2$Sample_ID)
data2$Dilution_factor <- as.numeric(data2$Dilution_factor)
data2$Injection<- as.factor(data2$Injection)
data2$Tech_rep <- as.numeric(data2$Tech_rep)


# Refactoring COlumns for timecourse
tc$Sample_ID <- as.factor(tc$Sample_ID)
tc$Day <- as.factor(tc$Day)
tc$Weight <- as.numeric(tc$Weight)
tc$TEI_Day <- as.factor(tc$TEI_Day)
tc1 <- tc %>% 
  select(Day:Pups)
tc1
## # A tibble: 40 × 5
##       Day Sample_ID TEI_Day Weight  Pups
##    <fctr>    <fctr>  <fctr>  <dbl> <int>
## 1       1         1       6  19.60     0
## 2       1         2       2  18.60     0
## 3       1         3       1  19.30     0
## 4       1         4       3  20.50     0
## 5       1         5       4  18.74     0
## 6       1         6       5  18.30     0
## 7       5         7       6  20.83    10
## 8       5         8       3  19.88    10
## 9       5         9       2  24.41     9
## 10      5        10       4  20.01     6
## # ... with 30 more rows

Back calculate the original concentration of the sample

data2 <- data2 %>% 
  mutate(True_Count=Dilution_factor*Count)
data2
## # A tibble: 249,000 × 7
##    particle_size Sample_ID Dilution_factor Injection Tech_rep Count
##            <dbl>    <fctr>           <dbl>    <fctr>    <dbl> <int>
## 1            0.5         2             500         1        0     0
## 2            1.5         2             500         1        0     0
## 3            2.5         2             500         1        0     0
## 4            3.5         2             500         1        0     0
## 5            4.5         2             500         1        0     0
## 6            5.5         2             500         1        0     0
## 7            6.5         2             500         1        0     0
## 8            7.5         2             500         1        0     0
## 9            8.5         2             500         1        0     0
## 10           9.5         2             500         1        0     0
## # ... with 248,990 more rows, and 1 more variables: True_Count <dbl>

Average the three technical replicate readings

data3 <- data2 %>% 
  group_by(particle_size,Sample_ID,Dilution_factor,Injection) %>% 
  summarise( tech_N = length(True_Count),
             tech_mean = mean(True_Count),
             tech_sd = sd(True_Count),
             tech_se = tech_sd/sqrt(tech_N))
data3
## Source: local data frame [82,000 x 8]
## Groups: particle_size, Sample_ID, Dilution_factor [?]
## 
##    particle_size Sample_ID Dilution_factor Injection tech_N tech_mean
##            <dbl>    <fctr>           <dbl>    <fctr>  <int>     <dbl>
## 1            0.5         1             500         1      3         0
## 2            0.5         1             500         2      3         0
## 3            0.5        10             500         1      3         0
## 4            0.5        10             500         2      3         0
## 5            0.5        11             500         1      3         0
## 6            0.5        11             500         2      3         0
## 7            0.5        12             500         1      6         0
## 8            0.5        12             500         2      3         0
## 9            0.5        13             500         1      3         0
## 10           0.5        13             500         2      3         0
## # ... with 81,990 more rows, and 2 more variables: tech_sd <dbl>,
## #   tech_se <dbl>
test1 <- left_join(tc1,data3, by= "Sample_ID")

Summarize samples by injection (average both injections)

data4 <- data3 %>% 
  group_by(particle_size,Sample_ID,Dilution_factor) %>% 
  summarise( inj_N = length(tech_mean),
             inj_mean = mean(tech_mean),
             inj_sd = sd(tech_mean),
             inj_se = inj_sd/sqrt(inj_N))
data4
## Source: local data frame [41,000 x 7]
## Groups: particle_size, Sample_ID [?]
## 
##    particle_size Sample_ID Dilution_factor inj_N inj_mean inj_sd inj_se
##            <dbl>    <fctr>           <dbl> <int>    <dbl>  <dbl>  <dbl>
## 1            0.5         1             500     2        0      0      0
## 2            0.5        10             500     2        0      0      0
## 3            0.5        11             500     2        0      0      0
## 4            0.5        12             500     2        0      0      0
## 5            0.5        13             500     2        0      0      0
## 6            0.5        14             500     2        0      0      0
## 7            0.5        15             500     2        0      0      0
## 8            0.5        16             500     2        0      0      0
## 9            0.5        17             500     2        0      0      0
## 10           0.5        18             500     2        0      0      0
## # ... with 40,990 more rows
test2 <- left_join(tc1,data4, by= "Sample_ID")

test2
## # A tibble: 41,000 × 11
##       Day Sample_ID TEI_Day Weight  Pups particle_size Dilution_factor
##    <fctr>     <chr>  <fctr>  <dbl> <int>         <dbl>           <dbl>
## 1       1         1       6   19.6     0           0.5             500
## 2       1         1       6   19.6     0           1.5             500
## 3       1         1       6   19.6     0           2.5             500
## 4       1         1       6   19.6     0           3.5             500
## 5       1         1       6   19.6     0           4.5             500
## 6       1         1       6   19.6     0           5.5             500
## 7       1         1       6   19.6     0           6.5             500
## 8       1         1       6   19.6     0           7.5             500
## 9       1         1       6   19.6     0           8.5             500
## 10      1         1       6   19.6     0           9.5             500
## # ... with 40,990 more rows, and 4 more variables: inj_N <int>,
## #   inj_mean <dbl>, inj_sd <dbl>, inj_se <dbl>

Quick visualizations

Graphing all samples

test1$Sample_ID_correct = factor(test1$Sample_ID, levels=c('1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31','32','33','34','35','36','70','73','74','75'))

graph1 <- test1 %>%
  ggplot(aes(x=particle_size, y=tech_mean,color=Injection ))+ #plot
  geom_ribbon(aes(ymin=tech_mean-tech_se, ymax=tech_mean+tech_se),alpha=0.2,fill = alpha('grey12', 0.2)) + #error bars
  geom_line(size=2.0) + xlim(0,500)+ #line size, x-axis scale
  scale_y_continuous(expand=c(0,0))+ #set bottom of graph
  xlab("Particle Size") + # X axis label
  ylab("\nMean Particle Concentration/ml\n") + # Y axis label
  ggtitle("Nanosight Histogram of\nVirgin Mouse Plasma")+ #title
  labs(color="Injection")+ #Label table title
  facet_wrap( ~ Sample_ID_correct, nrow=7)

graph1

Graphing averaged samples by experimental day

graph2 <- test2 %>%
  group_by(TEI_Day) %>% 
  ggplot(aes(x=particle_size, y=inj_mean,color=Day ))+ #plot
  #geom_ribbon(aes(ymin=inj_mean-inj_se, ymax=inj_mean+inj_se),alpha=0.2,fill = alpha('grey12', 0.2)) + #error bars
  geom_line(size=2) + xlim(0,500)+ #line size, x-axis scale
  scale_y_continuous(expand=c(0,0))+ #set bottom of graph
  xlab("Particle Size") + # X axis label
  ylab("\nMean Particle Concentration/ml\n") + # Y axis label
  ggtitle("Nanosight Histogram of\nVirgin Mouse Plasma")+ #title
  labs(color="Condition")+ #Label table title
  facet_wrap(~ TEI_Day, ncol=7)

graph2

### Particle concentration values for each of the 36 samples

test3 <- test2 %>% 
  group_by(Day,Sample_ID) %>% 
  summarise(particle_conc=sum(inj_mean))
test3
## Source: local data frame [40 x 3]
## Groups: Day [?]
## 
##       Day Sample_ID particle_conc
##    <fctr>     <chr>         <dbl>
## 1       1         1  124235076333
## 2       1         2   89249062500
## 3       1         3  175595167167
## 4       1         4  153741474667
## 5       1         5  182057049833
## 6       1         6  282458569250
## 7       5        10  306075805167
## 8       5        11  241497990000
## 9       5        12  336677212208
## 10      5         7  167230730583
## # ... with 30 more rows

Summary statistics of particle concentration (averaging n=6 for each time point)

test4 <- test3 %>% 
  group_by(Day) %>% 
  summarise(Day_N=length(particle_conc),
            Day_mean = mean(particle_conc),
            Day_sd = sd(particle_conc),
            Day_se = Day_sd/sqrt(Day_N))
test4
## # A tibble: 6 × 5
##      Day Day_N     Day_mean       Day_sd      Day_se
##   <fctr> <int>        <dbl>        <dbl>       <dbl>
## 1      1     6 167889399958  65842785695 26880204699
## 2      5     6 234722109938  73806691215 30131455513
## 3     10     6 258974135458 101749179329 41538928517
## 4     14     7 391625801212  95846005651 36226385016
## 5     17     9 313145842646 148840936571 49613645524
## 6     20     6 214896696056  50294400099 20532602860

Boxplot

plot1 <- test3 %>% 
  filter(!Sample_ID %in% c('6','28','32')) %>% 
  group_by(Day) %>% 
  ggplot(aes(x= Day, y = particle_conc, color=Day)) +
  geom_boxplot(colour="black",fill=NA) + 
  geom_point(aes(text = paste("Sample ID:", Sample_ID)),
             position='jitter',size=3)+
  xlab("\nDay of Gestation\n") + # X axis label
  ylab("\nExosomes/ml\n") + # Y axis label
  ggtitle("Plasma Exosome Concentration\nThroughout Pregnancy\n")+ #title
  labs(color="Condition")+ # Label table title
  scale_x_discrete(breaks=c("1","5","10","14","17","20"),  # Change X axis label
                   labels=c("Virgin","5","10","14","17","1 Day Post")) +
  scale_color_discrete(labels=c("Virgin","5","10","14","17","1 Day Post")) # Change Legend
  
plot1

#ggsave("Exosome_plot.png", height = 5, width = 7, units = "in", dpi = 600)

Plotly

  ggplotly(plot1)

Bar plot

plot <- test4 %>% 
  ggplot(aes(x=Day, y=Day_mean, fill=Day ))+ #plot
  geom_col()+
  geom_errorbar(aes(ymin=Day_mean-Day_se, ymax=Day_mean+Day_se), width=.5, 
                size=0.8, colour="black", position=position_dodge(.9)) + #error bars
  scale_y_continuous(expand=c(0,0), breaks = seq(1E11,4E11,1E11))+ #set bottom of graph and scale
  xlab("\nDay of Gestation\n") + # X axis label
  ylab("\nExosomes/ml\n") + # Y axis label
  ggtitle("Plasma Exosome Concentration\nThroughout Pregnancy\n")+ #title
  labs(fill="Condition") + # Label table title
  scale_x_discrete(breaks=c("1","5","10","14","17","20"),  # Change X axis label
                   labels=c("Virgin","5","10","14","17","1 Day Post")) +
  scale_fill_discrete(labels=c("Virgin","5","10","14","17","1 Day Post"))

plot

Looking at Variation between the days the samples were run

test7 <- test3 %>% 
  left_join(tc1)
## Joining, by = c("Day", "Sample_ID")
plot2 <- test7 %>%
  ggplot(aes(x = Day, y = particle_conc, color = Day, shape=TEI_Day))+
  geom_point(position= 'dodge',size=4)+
  scale_shape_manual(values=c(15,16,17,18,22,23,24))+
  xlab("\nDay of Gestation\n") + # X axis label
  ylab("\nExosomes/ml\n") + # Y axis label
  ggtitle("Plasma Exosome Concentration\nThroughout Pregnancy\n")+ #title
  labs(color="Condition") + # Label table title
  scale_x_discrete(breaks=c("1","5","10","14","17","20"),  # Change X axis label
                   labels=c("Virgin","5","10","14","17","1 Day Post")) +
  scale_color_discrete(labels=c("Virgin","5","10","14","17","1 Day Post"))

plot2

ggplotly(plot2)
## Warning: Width not defined. Set with `position_dodge(width = ?)`

Looking at nanoparticle range

nano_100 <- data4 %>% 
  filter(particle_size<140.5)

nano_100_data <- left_join(tc1,nano_100, by= "Sample_ID")

nano_100_data_plot <- nano_100_data %>%
  group_by(Day,Sample_ID) %>% 
  summarise(particle_conc=sum(inj_mean)) %>% 
  filter(!Sample_ID %in% c('6','28','32')) %>% 
  ggplot(aes(factor(Day),particle_conc, color=Day)) +
  geom_boxplot(colour="black",fill=NA) + 
  geom_point(position='jitter',size=3)+
  xlab("\nDay of Gestation\n") + # X axis label
  ylab("\nExosomes/ml\n") + # Y axis label
  ggtitle("Plasma Exosome Concentration\nThroughout Pregnancy\n")+ #title
  labs(color="Condition") + # Label table title
  scale_x_discrete(breaks=c("1","5","10","14","17","20"),  # Change X axis label
                   labels=c("Virgin","5","10","14","17","1 Day Post")) +
  scale_color_discrete(labels=c("Virgin","5","10","14","17","1 Day Post"))

nano_100_data_plot   

Statistics

Shapiro test

tidy(shapiro.test(test3$particle_conc))
##   statistic   p.value                      method
## 1 0.9577149 0.1398459 Shapiro-Wilk normality test

p value >0.05 therefore conlude data is normally distributed

ANOVA

jacob <- nano_100_data %>%
   group_by(Day,Sample_ID) %>% 
  summarise(particle_conc=sum(inj_mean)) %>% 
  filter(!Sample_ID %in% c('6','28','32'))
         
fit <- aov(particle_conc ~ Day, data=jacob)
stats <- tidy(fit)
stats
##        term df        sumsq       meansq statistic      p.value
## 1       Day  5 1.765606e+23 3.531211e+22  5.741935 0.0007319539
## 2 Residuals 31 1.906458e+23 6.149863e+21        NA           NA

Statistically significant, thus Tukey’s HSD post hoc analysis can determine significant differences.

Tukey Post Hoc Test

HSD <- TukeyHSD(fit)
tukey <- tidy(HSD)
tukey
##    term comparison      estimate      conf.low    conf.high adj.p.value
## 1   Day        5-1   85375863988  -58753165966 229504893941 0.481653414
## 2   Day       10-1   89284191842  -54844838111 233413221795 0.432575576
## 3   Day       14-1  201347532652   61976554110 340718511195 0.001588987
## 4   Day       17-1  186209230790   50516235582 321902225997 0.002888917
## 5   Day       20-1   73663702367  -76874159770 224201564503 0.675812554
## 6   Day       10-5    3908327854 -133513310230 141329965939 0.999999250
## 7   Day       14-5  115971668665  -16451135257 248394472587 0.113326953
## 8   Day       17-5  100833366802  -27712805008 229379538612 0.194145005
## 9   Day       20-5  -11712161621 -155841191574 132416868332 0.999861973
## 10  Day      14-10  112063340811  -20359463111 244486144733 0.135728430
## 11  Day      17-10   96925038948  -31621132862 225471210758 0.229044302
## 12  Day      20-10  -15620489475 -159749519428 128508540478 0.999434914
## 13  Day      17-14  -15138301863 -138326006898 108049403172 0.998961481
## 14  Day      20-14 -127683830286 -267054808828  11687148257 0.087735009
## 15  Day      20-17 -112545528423 -248238523631  23147466785 0.150039096

Significant Tukey Post Hoc Test Values

tukey %>%
  filter(adj.p.value<0.05) %>% 
  arrange(adj.p.value)
##   term comparison     estimate    conf.low    conf.high adj.p.value
## 1  Day       14-1 201347532652 61976554110 340718511195 0.001588987
## 2  Day       17-1 186209230790 50516235582 321902225997 0.002888917
test8 <- test7 %>% 
  filter(!Day == "20")

fit <- lm(particle_conc ~ Weight ,data = test8)

summary(fit)
## 
## Call:
## lm(formula = particle_conc ~ Weight, data = test8)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -2.600e+11 -7.122e+10 -2.456e+10  8.690e+10  2.239e+11 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 5.768e+10  7.666e+10   0.752  0.45727   
## Weight      8.548e+09  2.849e+09   3.000  0.00519 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.127e+11 on 32 degrees of freedom
## Multiple R-squared:  0.2196, Adjusted R-squared:  0.1952 
## F-statistic: 9.002 on 1 and 32 DF,  p-value: 0.00519
tidy(summary(fit))
##          term    estimate   std.error statistic     p.value
## 1 (Intercept) 57684926799 76661538848 0.7524624 0.457272844
## 2      Weight  8547844440  2848941108 3.0003584 0.005189953
test7 %>% 
  ggplot(aes(x= Weight, y = particle_conc))+
  geom_point()+
  geom_smooth()+
  xlab("\nWeight (g)\n") + # X axis label
  ylab("Exosomes/ml\n") + # Y axis label
  ggtitle("Linear Regression of Exosome \nConcentration vs. Weight")+ #title
  labs(color="Day")#Label table title
## `geom_smooth()` using method = 'loess'

test7 %>% 
  filter(!Sample_ID == '70') %>% 
  ggplot(aes(x= Weight, y = particle_conc))+
  geom_point(size = 3,aes(color=factor(Day)))+
  geom_smooth(method = "lm", level = 0.95)+
  xlab("\nWeight (g)\n") + # X axis label
  ylab("Exosomes/ml\n") + # Y axis label
  ggtitle("Linear Regression of Exosome \nConcentration vs. Weight")+ #title
  labs(color="Day")#Label table title

test7 %>% 
  ggplot(aes(x= Pups, y = particle_conc))+
  geom_point(size = 4, aes(color=factor(Day)))+
  #geom_smooth(method = "lm", level = 0.95)+
  scale_x_continuous(breaks=seq(0,12,2))+
  xlab("\nNumbwe of Pups\n") + # X axis label
  ylab("Exosomes/ml\n") + # Y axis label
  ggtitle("Linear Regression of Exosome \nConcentration vs. Number of Pups")+ #title
  labs(color="Day")#Label table title

mean_placenta <- tc %>% 
  filter(Day %in% c('10','14','17') & !Sample_ID %in% c('70','73','74','75')) %>%
  select(-(TEI_Day:Pup_right),-Resorp) %>% 
  gather("Placenta_avg","Plac_weight", 3:5) %>%
  group_by(Day,Sample_ID) %>% 
  summarise(N = length(Plac_weight),
            mean_plac = mean(Plac_weight*1000, na.rm = TRUE), #convert g to mg
            sd = sd(Plac_weight)*1000,  #convert g to mg
            se = sd/sqrt(N))

mean_placenta %>% 
  inner_join(test7) %>% 
  ggplot(aes(x= mean_plac, y = particle_conc))+
  geom_point(size= 3,aes(color=factor(Day)))+
  geom_smooth(method = "lm", level = 0.95)+
  xlab("\nPlacental Weight (mg)\n") + # X axis label
  ylab("\nExosomes/ml\n") + # Y axis label
  ggtitle("Plasma Exosome Concentration\nThroughout Pregnancy\n")+ #title
  labs(color="G.D.")#Label table title
## Joining, by = c("Day", "Sample_ID")
## Warning in inner_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factor and character vector, coercing into character vector

mean_placenta %>% 
  inner_join(test7) %>% 
  ggplot(aes(x = Pups, y = particle_conc))+
  geom_point(size= 3,aes(color=factor(Day)))+
  geom_smooth(method = "lm", se= FALSE)+
  facet_wrap(~Day)+
  scale_x_continuous(breaks=seq(1,12,2))+
  xlab("\nNumbr of Pups\n") + # X axis label
  ylab("\nExosomes/ml\n") + # Y axis label
  ggtitle("Plasma Exosome Concentration\nThroughout Pregnancy\n")+ #title
  labs(color="G.D.")#Label table title
## Joining, by = c("Day", "Sample_ID")
## Warning in inner_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factor and character vector, coercing into character vector

test3 %>% 
  filter(!Day %in% c('1','20')) %>% 
  group_by(Day) %>% 
  ggplot(aes(factor(Day),particle_conc, color=Day)) +
  geom_point(size=3)+
  xlab("\nDay of Gestation\n") + # X axis label
  ylab("\nExosomes/ml\n") + # Y axis label
  ggtitle("Plasma Exosome Concentration\nThroughout Pregnancy\n")+ #title
  labs(color="Condition")#Label table title